import glob
import os
import re

def validate_record(record, file_name, line_number):
    """Validate a single TXT record against the specification."""
    fields = record.strip().split('\t')
    errors = []
    min_fields = 20  # Minimum fields up to interestedParty1/role
    expected_roles = {"CA", "AR", "SE", "PA", "ES", "AM", "SA", "C", "AD", "A", "E", "AQ", "SR", "TR"}
    publisher_roles = {"AM", "E"}

    # Check field count
    if len(fields) < min_fields or (len(fields) - min_fields) % 3 != 0:
        errors.append(f"Line {line_number}: Incorrect field count. Need at least {min_fields} fields, with extras in multiples of 3.")

    # Mandatory fields (0, 1, 2, 3, 4, 5, 7, 8, 14, 17, 18, 19)
    mandatory_fields = [0, 1, 2, 3, 4, 5, 7, 8, 14, 17, 18, 19]
    for i in mandatory_fields:
        if i < len(fields) and not fields[i].strip():
            errors.append(f"Line {line_number}: Mandatory field {i} is empty.")

    # Validate recordType (0) - must be exactly 'findSubmissions' (lowercase)
    if len(fields) > 0 and fields[0].strip() != "findSubmissions":
        errors.append(f"Line {line_number}: recordType must be exactly 'findSubmissions' (lowercase), got '{fields[0]}'.")

    # Validate submittingPublisher/role (6)
    if len(fields) > 6 and fields[6].strip() and fields[6] not in publisher_roles:
        errors.append(f"Line {line_number}: submittingPublisher/role must be 'AM' or 'E', got '{fields[6]}'.")

    # Validate disambiguation (9)
    disambiguation = fields[9].strip().lower() if len(fields) > 9 else ""
    if disambiguation and disambiguation not in {"true", "false"}:
        errors.append(f"Line {line_number}: disambiguation must be 'true' or 'false', got '{disambiguation}'.")

    # Conditional disambiguationReason (10) and disambiguateFrom (11)
    if disambiguation == "true":
        if len(fields) <= 10 or not fields[10].strip():
            errors.append(f"Line {line_number}: disambiguationReason required when disambiguation is true.")
        if len(fields) <= 11 or not fields[11].strip():
            errors.append(f"Line {line_number}: disambiguateFrom required when disambiguation is true.")
        else:
            iswcs = fields[11].split("|")
            for iswc in iswcs:
                if iswc.strip() and not re.match(r'^T\d{10}$', iswc.strip()):
                    errors.append(f"Line {line_number}: Invalid ISWC '{iswc}' in disambiguateFrom.")

    # Validate derivedWorkType (12) and derivedFromIswcs (13)
    derived_work_type = fields[12].strip() if len(fields) > 12 else ""
    if derived_work_type and derived_work_type not in {"ModifiedVersion", "Excerpt", "Composite"}:
        errors.append(f"Line {line_number}: derivedWorkType must be 'ModifiedVersion', 'Excerpt', or 'Composite', got '{derived_work_type}'.")
    if derived_work_type and (len(fields) <= 13 or not fields[13].strip()):
        errors.append(f"Line {line_number}: derivedFromIswcs required when derivedWorkType is set.")

    # Validate ISRCs (15)
    if len(fields) > 15 and fields[15].strip():
        isrcs = fields[15].split("|")
        for isrc in isrcs:
            if isrc.strip() and not re.match(r'^[A-Z]{2}[A-Z0-9]{3}\d{2}\d{5}$', isrc.strip()):
                errors.append(f"Line {line_number}: Invalid ISRC '{isrc}' in additionalIdentifiers/ISRCs.")

    # Validate agencyWorkCodes (16)
    if len(fields) > 16 and fields[16].strip():
        codes = fields[16].split("|")
        for code in codes:
            if code.strip() and not re.match(r'^\([A-Za-z0-9]+,[A-Za-z0-9+\)$', code.strip()):
                errors.append(f"Line {line_number}: Invalid agencyWorkCode '{code}' in additionalIdentifiers/agencyWorkCodes.")

    # Validate interested parties
    ip_count = (len(fields) - min_fields) // 3 + 1 if len(fields) >= min_fields else 1
    creator_role_found = False
    for i in range(ip_count):
        name_idx = 17 + i * 3
        name_number_idx = 18 + i * 3
        role_idx = 19 + i * 3
        if name_idx < len(fields) and not fields[name_idx].strip():
            errors.append(f"Line {line_number}: interestedParty{i+1}/name (field {name_idx}) is mandatory.")
        if name_number_idx < len(fields) and not fields[name_number_idx].strip():
            errors.append(f"Line {line_number}: interestedParty{i+1}/nameNumber (field {name_number_idx}) is mandatory.")
        if role_idx < len(fields):
            role = fields[role_idx].strip()
            if not role:
                errors.append(f"Line {line_number}: interestedParty{i+1}/role (field {role_idx}) is mandatory.")
            elif role not in expected_roles:
                errors.append(f"Line {line_number}: interestedParty{i+1}/role must be one of {expected_roles}, got '{role}'.")
            if role in {"CA", "AR", "SE", "C", "AD", "A", "AQ", "SR", "TR"}:
                creator_role_found = True

    # Check for creator role
    if not creator_role_found:
        errors.append(f"Line {line_number}: At least one interested party must have a creator role (CA, AR, SE, C, AD, A, AQ, SR, TR).")

    return errors

# Set working directory to the script's directory
os.chdir(os.path.dirname(os.path.abspath(__file__)))

# Process all .txt files
for file_name in glob.glob("*.txt"):
    print(f"Validating file: {file_name}")
    try:
        with open(file_name, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            for line_number, record in enumerate(lines, 1):
                if record.strip():  # Skip empty lines
                    errors = validate_record(record, file_name, line_number)
                    if not errors:
                        print(f"Line {line_number}: Record matches specification :)\n")
                    else:
                        print(f"Line {line_number}: Record does not match specification :( ... details follow\n")
                        for error in errors:
                            print(error)
                        print()
    except Exception as e:
        print(f"Error reading file {file_name}: {str(e)}\n")